From 867e2ab77c6218d249da738a01e01eff60d9bfd9 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Thu, 7 Sep 2017 01:07:28 +0200
Subject: [PATCH] extensions: keep old algoritmic float in parallel with table

The newly introduced table isn't bit accurate with our reference,
the ~3x faster float 2 half conversion is available with a higher
tolerance.
---
 extensions/float-half.c | 89 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/extensions/float-half.c b/extensions/float-half.c
index 3adc10c..5b9d219 100644
--- a/extensions/float-half.c
+++ b/extensions/float-half.c
@@ -278,6 +278,86 @@ conv_rgbaF_rgbaHalf (const Babl *conversion,const float *src, uint16_t *dst, lon
   return conv_yF_yHalf (conversion, src, dst, samples * 4) / 4;
 }
 
+static void singles2halfp2(void *target, const void *source, long numel)
+{
+    uint16_t *hp = (uint16_t *) target; // Type pun output as an unsigned 16-bit int
+    uint32_t *xp = (uint32_t *) source; // Type pun input as an unsigned 32-bit int
+    uint16_t    hs, he, hm;
+    uint32_t x, xs, xe, xm;
+    int hes;
+    
+    if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real)
+        return;
+    }
+    while( numel-- ) {
+        x = *xp++;
+        if( (x & 0x7FFFFFFFu) == 0 ) {  // Signed zero
+            *hp++ = (uint16_t) (x >> 16);  // Return the signed zero
+        } else { // Not zero
+            xs = x & 0x80000000u;  // Pick off sign bit
+            xe = x & 0x7F800000u;  // Pick off exponent bits
+            xm = x & 0x007FFFFFu;  // Pick off mantissa bits
+            if( xe == 0 ) {  // Denormal will underflow, return a signed zero
+                *hp++ = (uint16_t) (xs >> 16);
+            } else if( xe == 0x7F800000u ) {  // Inf or NaN (all the exponent bits are set)
+                if( xm == 0 ) { // If mantissa is zero ...
+                    *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf
+                } else {
+                    *hp++ = (uint16_t) 0xFE00u; // NaN, only 1st mantissa bit set
+                }
+            } else { // Normalized number
+                hs = (uint16_t) (xs >> 16); // Sign bit
+                hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
+                if( hes >= 0x1F ) {  // Overflow
+                    *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf
+                } else if( hes <= 0 ) {  // Underflow
+                    if( (14 - hes) > 24 ) {  // Mantissa shifted all the way off & no rounding possibility
+                        hm = (uint16_t) 0u;  // Set mantissa to zero
+                    } else {
+                        xm |= 0x00800000u;  // Add the hidden leading bit
+                        hm = (uint16_t) (xm >> (14 - hes)); // Mantissa
+                        if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
+                            hm += (uint16_t) 1u; // Round, might overflow into exp bit, but this is OK
+                    }
+                    *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
+                } else {
+                   he = (uint16_t) (hes << 10); // Exponent
+                    hm = (uint16_t) (xm >> 13); // Mantissa
+                    if( xm & 0x00001000u ) // Check for rounding
+                        *hp++ = (hs | he | hm) + (uint16_t) 1u; // Round, might overflow to inf, this is OK
+                    else
+                        *hp++ = (hs | he | hm);  // No rounding
+                }
+            }
+        }
+    }
+}
+
+static inline long
+conv2_yF_yHalf (const Babl *conversion,const float *src, uint16_t *dst, long samples)
+{
+  singles2halfp2 (dst, src, samples);
+  return samples;
+}
+
+static long
+conv2_yaF_yaHalf (const Babl *conversion,const float *src, uint16_t *dst, long samples)
+{
+  return conv2_yF_yHalf (conversion, src, dst, samples * 2) / 2;
+}
+
+static long
+conv2_rgbF_rgbHalf (const Babl *conversion,const float *src, uint16_t *dst, long samples)
+{
+  return conv2_yF_yHalf (conversion, src, dst, samples * 3) / 3;
+}
+
+static long
+conv2_rgbaF_rgbaHalf (const Babl *conversion,const float *src, uint16_t *dst, long samples)
+{
+  return conv2_yF_yHalf (conversion, src, dst, samples * 4) / 4;
+}
+
 int init (void);
 
 int
@@ -402,6 +482,11 @@ init (void)
   babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \
   babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv_ ## src ## _ ## dst, NULL); \
 }
+#define CONV2(src, dst) \
+{ \
+  babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv2_ ## src ## _ ## dst, NULL); \
+  babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv2_ ## src ## _ ## dst, NULL); \
+}
 
   CONV(rgbaHalf, rgbaF);
   CONV(rgbHalf,  rgbF);
@@ -411,6 +496,10 @@ init (void)
   CONV(rgbF,     rgbHalf);
   CONV(yaF,      yaHalf);
   CONV(yF,       yHalf);
+  CONV2(rgbaF,    rgbaHalf);
+  CONV2(rgbF,     rgbHalf);
+  CONV2(yaF,      yaHalf);
+  CONV2(yF,       yHalf);
 
   return 0;
 }
-- 
2.30.2